In [ ]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
1. Familiar with Python
2. Completed Chapter 2: Wide Convolutional Models
1. Code a Naive Inception module
2. Code a Inception V1 block
3. Refactor an Inception V1 block.
4. Code a mini Wide Residual Network (WRN)
Let's code an naive inception module:
You fill in the blanks (replace the ??), make sure it passes the Python interpreter.
You will need to:
1. Create 4 branches.
2. Implement each parallel branch
3. Concatenate the output from each branch into a single output for the module.
In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, ReLU, BatchNormalization, MaxPooling2D, Concatenate, SeparableConv2D
def naive_inception(inputs):
# pooling branch
# HINT: The feature map output must stay the same, so don't downsample it, and remember the padding
x1 = MaxPooling2D((2, 2), ??)(inputs)
# 1x1 branch
x2 = Conv2D(64, (1, 1), strides=1, padding='same', activation='relu')(inputs)
# 3x3 branch
# HINT: should look like the 1x1 convolution, except it uses a 3x3
x3 = ??
# 5x5 branch
x4 = Conv2D(64, (5, 5), strides=1, padding='same', activation='relu')(inputs)
# Concatenate the output from the four branches together
# HINT: Should be a list of the four branches outputs (x...)
outputs = Concatenate()([??])
return outputs
inputs = Input((32, 32, 3))
outputs = naive_inception(inputs)
model = Model(inputs, outputs)
It should look like below:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 32, 32, 3)] 0
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 32, 32, 3) 0 input_3[0][0]
__________________________________________________________________________________________________
conv2d_6 (Conv2D) (None, 32, 32, 64) 256 input_3[0][0]
__________________________________________________________________________________________________
conv2d_7 (Conv2D) (None, 32, 32, 64) 1792 input_3[0][0]
__________________________________________________________________________________________________
conv2d_8 (Conv2D) (None, 32, 32, 64) 4864 input_3[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 32, 32, 195) 0 max_pooling2d_2[0][0]
conv2d_6[0][0]
conv2d_7[0][0]
conv2d_8[0][0]
==================================================================================================
Total params: 6,912
Trainable params: 6,912
Non-trainable params: 0
In [ ]:
model.summary()
In [ ]:
def inception_block(inputs):
# pooling branch
x1 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(inputs)
# Add a 1x1 bottleneck convolution with 64 filters
# HINT: the output shape should not change (think of strides and padding)
x1 = Conv2D(64, (1, 1), ??)
# 1x1 branch
x2 = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(inputs)
# 3x3 branch
# Add 1x1 bottleneck convolution of 64 filters
# HINT: the input should be the input to the block
x3 = ??
x3 = Conv2D(96, (3, 3), strides=(1, 1), padding='same', activation='relu')(x3)
# 5x5 branch
# Add 1x1 bottleneck convolution of 64 filters
# HINT: the input should be the input to the block
x4 = ??
x4 = Conv2D(48, (5, 5), strides=(1, 1), padding='same', activation='relu')(x4)
outputs = Concatenate()([x1, x2, x3, x4])
return outputs
inputs = Input((32, 32, 3))
outputs = inception_block(inputs)
model = Model(inputs, outputs)
It should look like below:
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_8 (InputLayer) [(None, 32, 32, 3)] 0
__________________________________________________________________________________________________
max_pooling2d_7 (MaxPooling2D) (None, 32, 32, 3) 0 input_8[0][0]
__________________________________________________________________________________________________
conv2d_29 (Conv2D) (None, 32, 32, 64) 256 input_8[0][0]
__________________________________________________________________________________________________
conv2d_31 (Conv2D) (None, 32, 32, 64) 256 input_8[0][0]
__________________________________________________________________________________________________
conv2d_27 (Conv2D) (None, 32, 32, 64) 256 max_pooling2d_7[0][0]
__________________________________________________________________________________________________
conv2d_28 (Conv2D) (None, 32, 32, 64) 256 input_8[0][0]
__________________________________________________________________________________________________
conv2d_30 (Conv2D) (None, 32, 32, 96) 55392 conv2d_29[0][0]
__________________________________________________________________________________________________
conv2d_32 (Conv2D) (None, 32, 32, 48) 76848 conv2d_31[0][0]
__________________________________________________________________________________________________
concatenate_7 (Concatenate) (None, 32, 32, 272) 0 conv2d_27[0][0]
conv2d_28[0][0]
conv2d_30[0][0]
conv2d_32[0][0]
==================================================================================================
Total params: 133,264
Trainable params: 133,264
Non-trainable params: 0
In [ ]:
model.summary()
Let's refactor the Inception V1 block, where:
1. The 5x5 parallel convolution is replaced by two 3x3 (B(3,3))
2. Replace the 3x3 convolution with a spatially separable convolution (3x1, 1x3)
You will need to:
1. Add the parallel spatially separable 3x1 and 1x3 convolutions.
2. Concatenate the outputs together from the separable convolutions.
3. Add the sequential two 3x3 convolutions.
In [ ]:
def inception_block(inputs):
# pooling branch
x1 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(inputs)
x1 = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(inputs)
# 1x1 branch
x2 = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(inputs)
# 3x3 branch
x3 = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(inputs)
# Add two parallel spatially separable convolutions for 3x1 and 1x3 with 96 filters
# HINT: Use SeparableConv2D. The input to both convolutions is the same, i.e., the output from
# the prior 1x1 bottleneck.
x3_a = ??
x3_b = ??
# Concatenate the outputs together from the spatially separable convolutions
# HINT: x3 was split into a and b, let's put them back together.
x3 = Concatenate()([??])
# 5x5 branch replaced by two 3x3
x4 = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(inputs)
# Add two sequential 3x3 normal convolutions with 48 filters
# HINT: both should have x4 as input.
x4 = ??
x4 = ??
outputs = Concatenate()([x1, x2, x3, x4])
return outputs
inputs = Input((32, 32, 3))
outputs = inception_block(inputs)
model = Model(inputs, outputs)
It should look like below. Note how the number of parameters after refactoring is about 1/2.
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 32, 32, 3)] 0
__________________________________________________________________________________________________
conv2d_11 (Conv2D) (None, 32, 32, 64) 256 input_3[0][0]
__________________________________________________________________________________________________
conv2d_12 (Conv2D) (None, 32, 32, 64) 256 input_3[0][0]
__________________________________________________________________________________________________
separable_conv2d (SeparableConv (None, 32, 32, 96) 6432 conv2d_11[0][0]
__________________________________________________________________________________________________
separable_conv2d_1 (SeparableCo (None, 32, 32, 96) 6432 conv2d_11[0][0]
__________________________________________________________________________________________________
conv2d_13 (Conv2D) (None, 32, 32, 48) 27696 conv2d_12[0][0]
__________________________________________________________________________________________________
conv2d_9 (Conv2D) (None, 32, 32, 64) 256 input_3[0][0]
__________________________________________________________________________________________________
conv2d_10 (Conv2D) (None, 32, 32, 64) 256 input_3[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 32, 32, 192) 0 separable_conv2d[0][0]
separable_conv2d_1[0][0]
__________________________________________________________________________________________________
conv2d_14 (Conv2D) (None, 32, 32, 48) 20784 conv2d_13[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 32, 32, 368) 0 conv2d_9[0][0]
conv2d_10[0][0]
concatenate_1[0][0]
conv2d_14[0][0]
==================================================================================================
Total params: 62,368
Trainable params: 62,368
Non-trainable params: 0
__________________________________________________________________________________________________
In [ ]:
model.summary()
Let's now code a mini version of a WRN:
1. Stem
2. Single Group of two residual blocks
3. Classifier
You will need to:
1. Get the value for k (width factor) from kwargs
2. Pass the width factor along with block params to the block method.
3. Determine the number of input channels (feature maps) for the block.
4. Complete the residual link.
5. Add the activation function for the classifier.
In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, GlobalAveragePooling2D, Dense, Add
def stem(inputs):
# 3x3 16 filter stem convolution with post-activation batch norm (CONV-BN-RE)
outputs = Conv2D(16, (3, 3), strides=(1, 1), padding='same')(inputs)
outputs = BatchNormalization()(outputs)
outputs = ReLU()(outputs)
return outputs
def group(inputs, **params):
# Get the kwarg blocks info.
blocks = params['blocks']
# Get the kwarg k (width factor)
# HINT: its the value of the key 'k'
k = params[??]
# Construct each block for this group
outputs = inputs
for block_params in blocks:
# Pass the global width parameter along with the block paramters
# HINT: You extracted the key-value above
outputs = block(outputs, **block_params, k=??)
return outputs
def block(inputs, **params):
n_filters = params['n_filters']
k = params['k']
# input will not match output shape.
# do 1x1 linear projection to match the shapes
# HINT: the channels is the last dimension. Input is a 4D tensor: (batch, height, width, channels)
in_channels = inputs.shape[??]
if in_channels != n_filters:
inputs = BatchNormalization()(inputs)
inputs = Conv2D(n_filters, (1, 1), strides=(1, 1), padding='same')(inputs)
# Dimensionality expansion
outputs = BatchNormalization()(inputs)
outputs = ReLU()(outputs)
# Set the number of expanded filters
# HINT: multiply the number of filters for the block by the width factor
outputs = Conv2D(??, (3, 3), strides=(1, 1), padding='same')(outputs)
# Dimensionality reduction
outputs = BatchNormalization()(outputs)
outputs = ReLU()(outputs)
outputs = Conv2D(n_filters, (3, 3), strides=(1, 1), padding='same')(outputs)
# Add the residual link to the outputs
# HINT: the residual link is the inputs to the block
outputs = Add()([??])
return outputs
def classifier(inputs, n_classes):
# Pool and Flatten into 1x1xC
outputs = GlobalAveragePooling2D()(inputs)
# Add the activation method to the classifier
# HINT: what activation is used for a multi-class classifier?
outputs = Dense(n_classes, activation=??)(outputs)
return outputs
inputs = Input((32, 32, 3))
outputs = stem(inputs)
outputs = group(outputs, **{ 'blocks': [ { 'n_filters': 32 }, { 'n_filters': 64 }], 'k': 4 })
outputs = classifier(outputs, 10)
model = Model(inputs, outputs)
It should look like below:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(None, 32, 32, 3)] 0
__________________________________________________________________________________________________
conv2d_18 (Conv2D) (None, 32, 32, 16) 448 input_6[0][0]
__________________________________________________________________________________________________
batch_normalization_18 (BatchNo (None, 32, 32, 16) 64 conv2d_18[0][0]
__________________________________________________________________________________________________
re_lu_17 (ReLU) (None, 32, 32, 16) 0 batch_normalization_18[0][0]
__________________________________________________________________________________________________
batch_normalization_19 (BatchNo (None, 32, 32, 16) 64 re_lu_17[0][0]
__________________________________________________________________________________________________
conv2d_19 (Conv2D) (None, 32, 32, 32) 544 batch_normalization_19[0][0]
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 32, 32, 32) 128 conv2d_19[0][0]
__________________________________________________________________________________________________
re_lu_18 (ReLU) (None, 32, 32, 32) 0 batch_normalization_20[0][0]
__________________________________________________________________________________________________
conv2d_20 (Conv2D) (None, 32, 32, 128) 36992 re_lu_18[0][0]
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 32, 32, 128) 512 conv2d_20[0][0]
__________________________________________________________________________________________________
re_lu_19 (ReLU) (None, 32, 32, 128) 0 batch_normalization_21[0][0]
__________________________________________________________________________________________________
conv2d_21 (Conv2D) (None, 32, 32, 32) 36896 re_lu_19[0][0]
__________________________________________________________________________________________________
add_6 (Add) (None, 32, 32, 32) 0 conv2d_19[0][0]
conv2d_21[0][0]
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 32, 32, 32) 128 add_6[0][0]
__________________________________________________________________________________________________
conv2d_22 (Conv2D) (None, 32, 32, 64) 2112 batch_normalization_22[0][0]
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 32, 32, 64) 256 conv2d_22[0][0]
__________________________________________________________________________________________________
re_lu_20 (ReLU) (None, 32, 32, 64) 0 batch_normalization_23[0][0]
__________________________________________________________________________________________________
conv2d_23 (Conv2D) (None, 32, 32, 256) 147712 re_lu_20[0][0]
__________________________________________________________________________________________________
batch_normalization_24 (BatchNo (None, 32, 32, 256) 1024 conv2d_23[0][0]
__________________________________________________________________________________________________
re_lu_21 (ReLU) (None, 32, 32, 256) 0 batch_normalization_24[0][0]
__________________________________________________________________________________________________
conv2d_24 (Conv2D) (None, 32, 32, 64) 147520 re_lu_21[0][0]
__________________________________________________________________________________________________
add_7 (Add) (None, 32, 32, 64) 0 conv2d_22[0][0]
conv2d_24[0][0]
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 64) 0 add_7[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 10) 650 global_average_pooling2d[0][0]
==================================================================================================
Total params: 375,050
Trainable params: 373,962
Non-trainable params: 1,088
__________________________________________________________________________________________________
In [ ]:
model.summary()
Finally, let's do a bit of training with your WRN model.
Let's get the tf.Keras builtin dataset for CIFAR-10. These are 32x32 color images (3 channels) of 10 classes (airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks). We will preprocess the image data (not covered yet).
In [ ]:
from tensorflow.keras.datasets import cifar10
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)
In [ ]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(x_train, y_train, epochs=3, batch_size=32, validation_split=0.1, verbose=1)
model.evaluate(x_test, y_test)